*************  Procedure to calculate Comparable DHS Wealth Index  ***********.
*************  Methodology devised by Shea O. Rutstein, Ph.D.          ***********.
*************  Algorithm by Shea O. Rutstein                                     ***********.
*************  Programmed in SPSS version 19 by Shea O. Rutstein  ***********.
*************  Some regression code by Trevor Croft                                         ***********.

*  Step 1.
***** Get Standard Recode SPSS .sav file for household (with members).

rename variables unmetbn=ubn. 
*  Step 2.
***** Calculate unmet basic needs  -- now done with CSPRO.
*     Step 2a:  Inadequate housing construction--dirt floor or natural or rustic walls.
*compute ubn1=0.
*if (hv213 ge 11 and hv213 le 19) ubn1=1.
*if (hv214 ge 11 and hv214 le 29) ubn1=1.
*VARIABLE LABELS ubn1 "Has inadequate housing construction".

*     Step 2b.  Household crowding--more than 3 persons per room (will use 4+ per sleeping room).
*compute ubn2=0.
*compute srooms=hv216.
*if (missing(hv216) or hv216=0) srooms=1.
*compute persons=hv012.
*if (missing(hv012) or hv012=0) persons=hv013.
*if (persons gt 0 and persons/srooms ge 4) ubn2=1.
*variable labels ubn2 "Household crowding: 4+ per sleeping room".

*     Step 2c.  Children of primary school age who do not go to school.

*compute ubn3=0.
*do repeat age=hv105$01 to hv105$36
                /dejure=hv102$01 to hv102$36
                /inschool=hv121$01 to hv121$36.
*if (age ge 7 and age le 11 and dejure=1 and inschool eq 0) ubn3=1.
*end repeat.
*variable labels ubn3 'Primary school children do not attend school'.

*     Step 2d.  High dependency on less educated head.  (uses 4+ persons with head less than completed primary education).
*compute ubn4=0.
*if (hv109$01 lt 2 and hv012 ge 4) ubn4=1.
*VARIABLE LABELS ubn4 "High dependency on less educated head of household".

*     Step 2e.  Inadequate toilet facilities.
****  Country-specific  ***.
*compute ubn5=0.
*if (hv225=1) ubn5=1.
*if (not((hv205 ge 11 and hv205 le 15) or hv205 eq 22 or hv205 eq 41)) ubn5=1.
*if (sanitlad ne 1) ubn5=1.
*variables labels ubn5 "Sanitary facility is not improved and unshared".

*     Step2 sum.
*compute ubn=ubn1+ubn2+ubn3+ubn4+ubn5.

DATASET NAME hhrecode.
compute wscore=hv271w/100000.
execute.

*cd 'c:\hnp2a\cross country'.


compute hweight=hv005/1000000.
weight by hweight.

execute.

* redo leaving out children's education for ubn.

compute ubn=ubna+ubnb+ubnd+ubne.

frequencies ubna ubnb ubnd ubne ubn.

SORT CASES BY ubn.


* OMS.
DATASET DECLARE  temp1.sav.
OMS
  /SELECT TABLES
  /IF COMMANDS=['Frequencies'] SUBTYPES=['Frequencies']
  /DESTINATION FORMAT=SAV NUMBERED=TableNumber_
   OUTFILE='temp1.sav'.
** Change to ubn.
FREQUENCIES VARIABLES=ubn
   /ORDER=ANALYSIS.
omsend.
weight off.

dataset activate temp1.sav.
compute #ptile1=100.
if (var2="0") #ptile1=100-CumulativePercent.
compute #ptile2=0.
if (var2="1") #ptile2=100-CumulativePercent.
compute #ptile3=0.
if (var2="2") #ptile3=100-CumulativePercent.
compute #ptile4=0.
if (var2="3") #ptile4=100-CumulativePercent.
*compute #ptile5=0.
*if (var2="4") #ptile5=100-CumulativePercent.

compute nullvar=0.
if (var2 eq 'Total') nullvar=1.
compute ptile1=#ptile1.
compute ptile2=#ptile2.
compute ptile3=#ptile3.
compute ptile4=#ptile4.
*compute ptile5=#ptile5.
EXECUTE.

*if ($casenum>1) ptile1=lag(ptile1).
*if ($casenum>2) ptile2=lag(ptile2).
*if ($casenum>3) ptile3=lag(ptile3).
*if ($casenum>4) ptile4=lag(ptile4).
*if ($casenum>5) ptile5=lag(ptile5).

if ($casenum>1) ptile1=lag(ptile1).
if ($casenum>1) ptile2=lag(ptile2).
if ($casenum>2) ptile3=lag(ptile3).
if ($casenum>3) ptile4=lag(ptile4).
*if ($casenum>5) ptile5=lag(ptile5).
EXECUTE.
*write outfile.
select if (var2='Total').

DATASET ACTIVATE hhrecode.
* For merging percentile values.
compute nullvar=1.
execute.

MATCH FILES /FILE=*
  /TABLE='temp1.sav'
  /BY nullvar.
EXECUTE.
dataset close temp1.sav.

sort cases by wscore.
** Accumulate household weights.
if ($casenum=1) sumwts=hweight.
if ($casenum>1) sumwts=hweight+lag(sumwts).
execute.

*compute cut5=$sysmis.
compute cut4=$sysmis.
compute cut3=$sysmis.
compute cut2=$sysmis.
compute cut1=$sysmis.

*compute #totnum=$casenum.
*if (ptile5/100 le sumwts/frequency) cut5=wscore.
* or should we use =(wscore+lag(wscore))/2 ? but what happens to first case or no ubn=5 ? .
if (ptile4/100 le sumwts/frequency) cut4=wscore.
if (ptile3/100 le sumwts/frequency) cut3=wscore.
if (ptile2/100 le sumwts/frequency) cut2=wscore.
if (ptile1/100 le sumwts/frequency) cut1=wscore.
execute.


*AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES OVERWRITE=yes
  /PRESORTED
  /BREAK=nullvar
  /cut5_min=MIN(cut5) 
  /cut4_min=MIN(cut4) 
  /cut3_min=MIN(cut3) 
  /cut2_min=MIN(cut2) 
  /cut1_min=MIN(cut1).

* Without education.
*AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES OVERWRITE=yes
  /PRESORTED
  /BREAK=nullvar
  /cut4_min=MIN(cut4) 
  /cut3_min=MIN(cut3) 
  /cut2_min=MIN(cut2) 
  /cut1_min=MIN(cut1).

AGGREGATE
  /OUTFILE=* MODE=ADDVARIABLES OVERWRITE=yes
  /PRESORTED
  /BREAK=nullvar
  /cut4_min=MIN(cut4) 
  /cut3_min=MIN(cut3) 
  /cut2_min=MIN(cut2).
 
*compute #cut5base=-1.79190.
compute #cut4base=-1.58845.
compute #cut3base=-1.22602.
compute #cut2base=-0.74636.
*compute #cut1base=-0.08100.

do if $casenum=1.
*write outfile='.\temp2.dat'
    / 1 cut5_min (f9.5) #cut5base (f9.5)
    / 2 cut4_min (f9.5) #cut4base (f9.5)
    / 3 cut3_min (f9.5) #cut3base (f9.5)
    / 4 cut2_min (f9.5) #cut2base (f9.5)
    / 5 cut1_min (f9.5) #cut1base (f9.5) .

*write outfile='.\temp2.dat'
    /  cut4_min (f9.5) #cut4base (f9.5)
    /  cut3_min (f9.5) #cut3base (f9.5)
    /  cut2_min (f9.5) #cut2base (f9.5)
    /  cut1_min (f9.5) #cut1base (f9.5) .

write outfile='.\temp2.dat'
    /  cut4_min (f9.5) #cut4base (f9.5)
    /  cut3_min (f9.5) #cut3base (f9.5)
    /  cut2_min (f9.5) #cut2base (f9.5).
end if.

execute.

** Now calculate regressions that estimate cut points for refrigerators and TVs.
** Need to figure out if weights should be used.

DATASET ACTIVATE hhrecode.

weight by hweight.

DATASET DECLARE  temp3.sav.
OMS
  /SELECT TABLES
  /IF COMMANDS=['Logistic Regression'] SUBTYPES=['Variables in the Equation']
  /DESTINATION FORMAT=SAV NUMBERED=TableNumber_
   OUTFILE='temp3.sav'.
*   TV *.
LOGISTIC REGRESSION VARIABLES hv208
  /METHOD=ENTER wscore 
  /CRITERIA=PIN(0.05) POUT(0.10) ITERATE(20) CUT(0.5).
* Refrigerator *.  
LOGISTIC REGRESSION VARIABLES HV209
  /METHOD=ENTER wscore 
  /CRITERIA=PIN(0.05) POUT(0.10) ITERATE(20) CUT(0.5).
* Car/truck *.  
LOGISTIC REGRESSION VARIABLES HV212
  /METHOD=ENTER wscore 
  /CRITERIA=PIN(0.05) POUT(0.10) ITERATE(20) CUT(0.5) .
* Landline Telephone *.  
LOGISTIC REGRESSION VARIABLES HV221
  /METHOD=ENTER wscore 
  /CRITERIA=PIN(0.05) POUT(0.10) ITERATE(20) CUT(0.5).
omsend.

dataset activate temp3.sav.

do if (TableNumber_=2 and var1="Step 1" and var2="Constant").
compute #tv=-B/lag(B).
compute #tvbase=-0.72445.
write outfile='.\temp4.dat'
    / #tv (f9.5) #tvbase (f9.5) .
end if.

do if (TableNumber_=4 and var1="Step 1" and var2="Constant").
compute #ref=-B/lag(B).
compute #refbase=1.23848.
write outfile='.\temp4.dat'
      / #ref (f9.5) #refbase  (f9.5) .
end if.

do if (TableNumber_=6 and var1="Step 1" and var2="Constant").
compute #car=-B/lag(B).
compute #carbase=3.50603.
write outfile='.\temp4.dat'
    / #car (f9.5) #carbase (f9.5) .
compute #carbase=3.50603.
end if.

do if (TableNumber_=8 and var1="Step 1" and var2="Constant").
compute #phone=-B/lag(B).
compute #phonebase=0.99461.
write outfile='.\temp4.dat'
    / #phone (f9.5) #phonebase  (f9.5) .
end if.
execute.

dataset close temp3.sav.
save OUTFILE='.\temp3.sav'.

** Concatenate output data files for UBN and TV-Ref.
*cd 'c:\hnp2a\cross country'.
host command=['copy .\temp2.dat+.\temp4.dat .\regdat.dat'].

**  Now read in data to be able to do regression.

data list file='.\regdat.dat'
   / cpc 1-9 cpb 10-18.
execute.
variable labels cpc 'Compared cut points'
                       /cpb 'Baseline cut points'.
DATASET NAME regdat.

* Declare a dataset to be written to in the regression.
dataset declare compcorv.
regression
  /missing listwise
  /statistics coeff outs r anova
  /criteria=pin(.05) pout(.10)
  /noorigin
  /dependent cpb
  /method=enter cpc
  /outfile=corv(compcorv).
* Activate file of output from regression.
dataset activate compcorv.
* Drop all rows of output except the coefficients.
select if (ROWTYPE_ = 'EST').
execute.
* Delete unnecessary variables before merging.
delete variables DEPVAR_ VARNAME_.
* Rename variables containing the constant and the coefficient.
rename variables CONST_=cpcconst cpc=cpccoeff.

* Re-activitate the main household data.
dataset activate hhrecode.
use all.
string ROWTYPE_ (A8).
compute ROWTYPE_ = 'EST'.

* merge the coefficients.
match files
  /file = *
  /table = compcorv
  /by ROWTYPE_.
execute.

dataset close regdat.
dataset close compcorv.
dataset activate hhrecode.

*** Calculate combined wealth score from Urban and Rural Scores.
* Use coefficients from urban and rural regressions above!.
compute compscor=0.
variable labels compscor "Comparative wealth score".
formats compscor (f11.5).
** Calculate comparative welath score from the regression above!.
compute compscor=cpcconst+cpccoeff*wscore.
execute.

** Now calculate the comparative quintiles.

recode compscor (lo thru -0.90802=1)(-0.90802 thru -0.38583=2)(-0.38583 thru -0.01189=3)
                           (-0.01189 thru 0.74156=4)(0.74156 thru hi=5) into compquin.
execute.

SORT CASES by hhid.
compute hhmemwt=HV005*hv012/1000000.
VARIABLE LABELS hhmemwt 'HH members weighting for Index' .

formats wscore compscor hhmemwt (f11.5) hv270w compquin (f2.0).

write outfile='.\compwealth.dat'
   /hhid wscore hv270w  compscor compquin hv005 hhmemwt.
execute.

*host command=['del .\temp*.dat' 'del .\regdat.dat'].

weight by hweight.
descriptives variables=compscor.

*save outfile='.\compwlth.sav'
   /drop nullvar, TableNumber_, Command_, Subtype_, Label_, Var1, Var2, Frequency, Percent, ValidPercent, CumulativePercent,
    cut5 to cut1, ROWTYPE_.

save outfile='.\compwlth.sav'
   /drop nullvar, TableNumber_, Command_, Subtype_, Label_, Var1, Var2, Frequency, Percent, ValidPercent, CumulativePercent,
    cut4 to cut1, ROWTYPE_.

* OMS.
DATASET DECLARE  meansd.sav.
OMS
  /SELECT TABLES
  /IF COMMANDS=['Descriptives'] SUBTYPES=['Descriptive Statistics']
   INSTANCES =1
  /DESTINATION FORMAT=SAV NUMBERED=TableNumber_
   OUTFILE='meansd.sav'.
descriptives variables=compscor.
omsend.

*dataset activate meansd.sav.
*select if $casenum=1.
*write outfile='.\meansd.dat'
   / mean std.deviation.
*execute.

OUTPUT SAVE OUTFILE = '.\compout.spv'.





